 ;;########################################################################
;; missd6.lsp
;; Copyright (c) 1998 by Pedro Valero (valerop@uv.es)
;; Code for multiple imputation of Missing data 
;; 
;; 
;;########################################################################

(defun my-logitreg-model (x y n &rest args)
"Args: (x y n &rest args)
Returns a logistic regression model (binomial regression model with logit
link). Accepts :OFFSET and :VERBOSE keywords in addition to the keywords
accepted by regression-model."
  (apply #'send my-logitregmodel-proto :new 
	 :x x :y y :trials n :link logit-link args)
  
 
  )

(defproto my-logitregmodel-proto '() () binomialreg-proto)
 (defmeth my-logitregmodel-proto :fit-means (&optional (eta (send self :eta)))
   (flet ((/zero (x) (if-else (> x 0.9999 ) 0.9999 x)))
     (let ((n (send self :trials))
          	(p (call-next-method eta)))
       (map-elements #'/zero (* n p)))))

;esta correcion impide que se produzcan errores por valores excesivamente ajustados de fit-means
;no me gusta mucho la verdad



(defmeth missing-data-model-object-proto :imputation ()
  
  (let* 
    (
     (data (copy-array (send self :data)))
     (data-random (copy-array data))
     (means-em (combine (send self :em-means)))
  
     (parametters (my-border-matrix 
                   (send self :emcovariance) means-em means-em (array-dimension data 0)))
     (rows-in-patterns (first (cases-in-missing-patterns (copy-array data))))
     (s (length rows-in-patterns))
     (p (- (array-dimension parametters 0) 1))    
     (c (make-array (list p 1)
                    :initial-element 0))
     (n-cases (array-dimension data 0))
     
    
     (patterns (patterns-missing data))
     
     (r (make-array (list s p) :initial-contents patterns))
     
     (mean-c-error (send self :compute-mean-c-error parametters 
                         s rows-in-patterns (copy-array data) r p patterns))
     )

  (dotimes (i s) 
             (dotimes (j p)

                      (if (and (equalp (aref r i j) 1) 
                               (> (aref parametters (+ 1 j) (+ 1 j)) 0))
                       
                          (setf parametters 
                                (select 
                                 (schafer-sweep-operator 
                                  parametters (list (+ 1 j))) 0))
                          )                           
                      (if  (and (equalp (aref r i j) 0) 
                                (< (aref parametters (+ 1 j) (+ 1 j)) 0))                              
                           (setf parametters 
                                 (select  
                                  (reverse-schafer-sweep-operator 
                                   parametters (list (+ 1 j))) 
                                  0))
                           ))
                             

             (dolist (m (select rows-in-patterns i))                            
                   (dolist (n (missing-in-missing-pattern patterns i))
                           
                                   (setf (select data m n)
                                         (sum (select parametters 0 (+ 1 n))
                                                       
                                              (* (select parametters (+ 1 n) 
                                                         (+ 1 (observed-in-missing-pattern  patterns i))) 
                                                 (select data m 
                                                         (observed-in-missing-pattern
                                                          patterns i)))))
                                    (setf (select data-random m n) (+ (select data m n)          
                                                                   
                                                           (* (select mean-c-error n) (first (normal-rand 1)))))
                                   )))
             
    (send self :mean-c-error mean-c-error)
             (list data data-random)))


(defmeth missing-data-model-object-proto :imputation-random-normal ()
  
  (let* 
    (
     (data (copy-array (send self :data)))
     (data-normal (copy-array (send self :imputed-data-normal)))
     (data-random (copy-array data-normal))
     (rows-in-patterns (first (cases-in-missing-patterns (copy-array data))))
     (s (length rows-in-patterns))   
     (patterns (patterns-missing data))     
     (mean-c-error (send self :mean-c-error))
     
     )

    (dotimes (i s) 
             (dolist (m (select rows-in-patterns i))                            
                     (dolist (n (missing-in-missing-pattern patterns i))
                             
                             (setf (select data-random m n)
                                   (+ (select data-normal m n)
                                      (* (select mean-c-error n) 
                                         (first (normal-rand 1)))))
                           
                                      )))
             
    data-random))


(defmeth missing-data-model-object-proto :imputation-close-random-normal ()
  
  (let* 
    (
    
     (data-normal (copy-array (send self :imputed-data-normal)))
     (data-random (copy-array data-normal))
     (data-close-random (copy-array data-normal))
     (rows-in-patterns (first (cases-in-missing-patterns (copy-array (send self :data)))))
     (s (length rows-in-patterns))   
     (patterns (patterns-missing (send self :data)))     
     (mean-c-error (send self :mean-c-error))
     (data (column-list (send self :data)))
     (var-now nil)
     
     )

    (dotimes (i s) 
             (setf var-now (non-missing (select data i)))
             (dolist (m (select rows-in-patterns i))                            
                     (dolist (n (missing-in-missing-pattern patterns i))
                             
                             (setf (select data-random m n)
                                   (+ (select data-normal m n)
                                      (* (select mean-c-error n) 
                                         (first (normal-rand 1)))))
                             (setf (select data-close-random m n)
                                   (select var-now
                                           (position 
                                            (min 
                                                (abs (- var-now
                                                        (select data-random m n))))
                                               (abs (- var-now
                                                       (select data-random m n))))))
                                  
                           
                                      )))
             
    data-close-random))


(defmeth missing-data-model-object-proto :compute-mean-c-error (parametters s rows-in-patterns data r p patterns)
"Computes EMC of the predicted values for observed values in data using the parametters estimated by EM. Useful to add a random component to imputed data smaller than using the variance of the variable. 
	Args:
	parametters parametters estimated by EM
	s	number of patterns
	rows-in-patterns rows in each pattern of missing data
	data	original data
	r r
	p number of variables
	patterns patterns of missing data"
  (let* (
         (parametters parametters)
         (s s)
         (rows-in-patterns rows-in-patterns)
         (data data)
         (r r)
         (predicted (make-array (array-dimensions data)))
         (p p)
         (patterns patterns)
         (observed-in-missing-pattern-list (observed-in-missing-pattern-list patterns))
         )
    (dotimes (i s) 
             (dotimes (j p)
                      (if (and (equalp (aref r i j) 1) 
                               (> (aref parametters (+ 1 j) (+ 1 j)) 0))
                          (setf parametters   (select (schafer-sweep-operator 
                                                       parametters (list (+ 1 j))) 0))
                          )                           
                      (if  (and (equalp (aref r i j) 0) 
                                (< (aref parametters (+ 1 j) (+ 1 j)) 0))                              
                           (setf parametters 
                                 (select  
                                  (reverse-schafer-sweep-operator 
                                   parametters (list (+ 1 j))) 
                                  0))
                           ))
                      

             

                     (dolist (n (select observed-in-missing-pattern-list  i))
                             (setf parametters 
                                   (select  
                                    (reverse-schafer-sweep-operator 
                                     parametters (list (+ 1 n))) 
                                    0))
                             (dolist (m (select rows-in-patterns i))
                                     (setf var-predictor (set-difference (select observed-in-missing-pattern-list  i) (list n)))
                                     (if var-predictor
                                         (setf (select predicted m n)
                                               (sum (select parametters 0 (+ 1 n))
                                                    (* (select parametters (+ 1 n) 
                                                               (+ 1 var-predictor)) 
                                                       (select data m 
                                                               var-predictor))))
                                 (setf (select predicted m n) (select parametters 0 (+ 1 n))))
                                     )
                             (setf parametters   (select (schafer-sweep-operator 
                                                          parametters (list (+ 1 n))) 0))
                             
                     )
             )
             

    (setf sum-square (map-elements #'reduce #'+
                                   (map-elements #'** 
                                     (- 
                                      (map-elements #'non-missing (column-list 
                                                                   data))
                                      (map-elements #'non-missing (column-list predicted))) 2)))
    
    (setf df (- (map-elements #'length (map-elements #'non-missing (column-list data))) p))
    (setf mean-c-error (sqrt (/ sum-square df)))
    
    mean-c-error)
  )
    
(defmeth missing-data-model-object-proto :multiple-imputation2 (creator &key (datasets 3))


  (let (
    (res nil)
        (datasets datasets)
        )


    (dotimes (i datasets)
             (setf res (column-list 
                                     (send self :imputation-random-normal)))
            (send self :add-dataset-multiple-imputed-list
                  (data (send self :name)          
                   :created  creator          
                   :title     (concatenate 'string "MI-"(send self :title))
                   :variables (send self :variables)  
         
                   :data     (combine (transpose (map-elements 'coerce res 'list)))
                   :labels (send self :labels)
                   )))
    )
  )

(defmeth missing-data-model-object-proto :multiple-imputation3 (creator &key (datasets 3))


  (let (
    (res nil)
        (datasets datasets)
        )


   
         (dotimes (i datasets)
             (setf res (column-list 
                                     (send self :imputation-close-random-normal)))
                  (send self :add-dataset-multiple-imputed-list
                        (data (send self :name)          
                              :created  creator          
                              :title     (concatenate 'string "MI-"(send self :title))
                              :variables (send self :variables)  
         
                              :data     (combine (transpose (map-elements 'coerce res 'list)))
                              :labels (send self :labels)
                   )))
    )
  )

(defmeth missing-data-model-object-proto :probability-missing ()
  (let*
    (
     
     (imputed-data-normal (column-list (send self :imputed-data-normal)))
     (missingness-matrix (column-list (send self :data-matrix-missing)))
     (n (1- (length (first imputed-data-normal))))
     (p (length imputed-data-normal))
      (missing-by-var (send self :missing-by-var))
      (probability-missing nil)
     )
     

   (dotimes (i p)
           
            (format t "Missing in variable:~25t~13,6g~%" (select (send self :variables) i))
            (setf probability-missing 
                  (append probability-missing
                          (if (select missing-by-var i)
                                                  (- 1 (list (send 
                                                              (my-logitreg-model 
                                                               (select imputed-data-normal 
                                                                       (set-difference 
                                                                        (iseq 0 (1- p)) (list i)))
                                                               (select missingness-matrix i)
                                                               1 
                                                               
                                                               :predictor-names 
                                                               (select (send self :variables)
                                                                       (remove i (iseq 0 (1- p))))
                                                              :response-name 
                                                               (select (send self :variables) i)
                                                               :verbose nil
                                                               )
                                                               
                                                              :fit-probabilities)))
                             (list (repeat 0 (1+ n)))
                              
                          )))
                
            
             )
    
   
    (make-array (list (1+ n) p) :initial-contents (combine (transpose probability-missing)))
    
  ))

(defmeth  missing-data-model-object-proto :multiple-imputation (creator &key (datasets 3))
  (let*
    (
     (datasets datasets)
    (missing-by-var (send self :missing-by-var))
     (data (column-list (send self :data)) )
     (n (length (first data)))
     (p (length missing-by-var))
     
     (present-by-var (map-elements 'set-difference 
                                    (repeat (list (iseq 0 (1- n))) p) missing-by-var ))
     (data-in-quintil (quintil-by-var (send self :probability-missing )))
     (missing-by-quintil nil)
     (present-by-quintil nil)
     (sampled-by-quintil nil)
    
                )
     
    
(dotimes (j datasets)
(dotimes (i p)
    
    (setf missing-by-quintil (map-elements #'intersection 
                                                  (select data-in-quintil i)
                                                  (repeat (list 
                                                           (select missing-by-var i))
                                                          5)
                                                  ))
         
    (setf present-by-quintil (map-elements #'intersection 
                                                  (select data-in-quintil i)
                                                  (repeat (list 
                                                           (select present-by-var i))
                                                          5)
                                                  ))
         
    ;Lo siguiente esta basado en que el primer y el ltimo quintil no pueden estar vacios
             ;el tercer quintil toma los valores de los lados pero si estos estan vacios ambos
             ; el resultado tambien estara vacio asi que toma los extremos. 
             ;Los otros dos cuartiles entonces tienen los lados vacios. 
             ;Puesto que todo esto puede producir repeticiones de 
             ;valores uso el remove-duplicates para evitarlo
             (if (not (third present-by-quintil))
               (setf (third present-by-quintil)
                     (remove-duplicates (combine (second present-by-quintil) 
                                                 (fourth present-by-quintil)))))
             (if (not (third present-by-quintil))
               (setf (third present-by-quintil)
                     (remove-duplicates (combine (first present-by-quintil) 
                                                 (fifth present-by-quintil)))))
             (if (not (fourth present-by-quintil))
                 (setf (fourth present-by-quintil)
                       (remove-duplicates (combine (third present-by-quintil) 
                                                   (fifth present-by-quintil)))))
             (if (not (second present-by-quintil))
                 (setf (second present-by-quintil)
                       (remove-duplicates (combine (first present-by-quintil) 
                                                   (third present-by-quintil)))))

      
    (setf sampled-by-quintil 
          (map-elements #'sample 
                        present-by-quintil 
                        (map-elements #'length 
                                      missing-by-quintil)
                        :replace))
         
    (setf (select (select data i) (combine (remove nil missing-by-quintil)))
          (select (select data i) (combine (remove nil sampled-by-quintil))))
         
        ) 
   
   (send self :add-dataset-multiple-imputed-list
         (data (send self :name)          
          :created  creator          
          :title     (concatenate 'string "MI-"(send self :title))
          :variables (send self :variables)  
         
          :data     (combine (transpose (map-elements 'coerce data 'list)))
          :labels (send self :labels)
          ))
    )))

(defun quintil-by-var (data-array)
  "Provides a list of lists of observations in each quintil by variable"
  (flet (
         (in-range (x inf sup)
                   (if (and (= inf sup) (= x sup))
                       t
                       (and (>= x inf) (< x sup))))
         )
  (let*
    (
     (data-array (column-list data-array))
     (qu
      (map-elements #'(lambda (x) (quantile x (list 0 .2 .4 .6 .8 1))) data-array))
     (p (length data-array))
     (res nil)
     (firstquintil nil)
     (secondquintil nil)
     (thirdquintil nil)
     (fourthquintil nil)
     (fifthquintil nil)
     
     )
  
   
    (dotimes (i p)
             (setf firstquintil (list (which (map-elements 
                                              #'in-range (select data-array i)
                                              (first (select qu i))
                                              (second (select qu i))))))

           
             
             (setf secondquintil (list (which (map-elements 
                                              #'in-range (select data-array i)
                                              (second (select qu i))
                                              (third (select qu i))))))
             (setf thirdquintil (list (which (map-elements 
                                              #'in-range (select data-array i)
                                              (third (select qu i))
                                              (fourth (select qu i))))))
             (setf fourthquintil (list (which (map-elements 
                                              #'in-range (select data-array i)
                                              (fourth (select qu i))
                                              (fifth (select qu i))))))
             (setf fifthquintil (list (which (map-elements 
                                              #'in-range (select data-array i)
                                              (fifth (select qu i))
                                              1.1)))) ;este valor garantiza que una probabilidad de uno sea considerada
            


             ;Lo siguiente esta basado en que el primer y el ltimo quintil no pueden estar vacios
             ;el tercer quintil toma los valores de los lados pero si estos estan vacios ambos
             ; el resultado tambien estara vacio asi que toma los extremos. 
             ;Los otros dos cuartiles entonces tienen los lados vacios. 
             ;Puesto que todo esto puede producir repeticiones de 
             ;valores uso el remove-duplicates para evitarlo
            

         (setf res  (append res (list (append firstquintil 
                                     secondquintil thirdquintil fourthquintil fifthquintil))))
             
    )
    
   res )))









(defun prep-datos (patterns data)
  "Makes preliminary transformations to data before starting analysis"
  (let* (
         (data data)
         (labels (send current-data :active-labels))
         (patterns patterns)
         (cases-in-missing-patterns 
           (first (cases-in-missing-patterns data)))
         (nvar (array-dimension data 1))
         )   
    (dotimes (i (length patterns))
             
             (if (equal (sum (select patterns i)) 0)
                 (setf cases-to-select (reverse 
                                        (set-difference
                                         (iseq (array-dimension data 0))
                                         (select cases-in-missing-patterns i))
                                        )))

             (if (equal (sum (select patterns i)) 0)
                 (setf data (select  data cases-to-select (iseq nvar))))
             
             (if (equal (sum (select patterns i)) 0)
                 (setf labels (select labels  cases-to-select (iseq nvar))))

            #| (if (equal (sum (select patterns i)) 0) 
                 (add-text *missing-report-window* (format nil "~%~20a  ~5,2f"      
                                         "Number of cases not considered for being fully missing"
                                         (length 
                                          (select cases-in-missing-patterns i))
                                         ) :scroll t))|#
                 )
    (list labels data)))





  
 
